USAboundaries::us_states
## function (map_date = NULL, resolution = c("low", "high"), states = NULL)
## {
## resolution <- match.arg(resolution)
## if (is.null(map_date)) {
## if (resolution == "low") {
## shp <- USAboundaries::states_contemporary_lores
## }
## else if (resolution == "high") {
## check_data_package()
## shp <- USAboundariesData::states_contemporary_hires
## }
## shp <- filter_by_states(shp, states)
## }
## else {
## map_date <- as.Date(map_date)
## stopifnot(as.Date("1783-09-03") <= map_date, map_date <=
## as.Date("2000-12-31"))
## check_data_package()
## if (resolution == "low") {
## shp <- USAboundariesData::states_historical_lores
## }
## else if (resolution == "high") {
## shp <- USAboundariesData::states_historical_hires
## }
## shp <- filter_by_date(shp, map_date)
## shp <- filter_by_states(shp, states)
## }
## shp
## }
## <bytecode: 0x7ff13456f8b0>
## <environment: namespace:USAboundaries>
conus = USAboundaries::us_states() %>%
filter(!state_name %in% c("Puerto Rico",
"Alaska",
"Hawaii"))
counties = USAboundaries::us_counties() %>%
st_as_sf(counties) %>%
filter(!state_name %in% c("Puerto Rico",
"Alaska",
"Hawaii")) %>%
st_transform(counties, crs = 5070)
county_centroid = st_centroid(counties) %>%
st_union()
vor_tess = st_voronoi(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
tri_tess = st_triangulate(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
gridded_counties = st_make_grid(county_centroid, n = 70, square = T) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
hex_counties = st_make_grid(county_centroid, n=70, square = F) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id=1:n())
vor_tess = st_intersection(vor_tess, st_union(counties))
tri_tess = st_intersection(tri_tess, st_union(counties))
gridded_counties = st_intersection(gridded_counties, st_union(counties))
hex_counties = st_intersection(hex_counties, st_union(counties))
counties_simp = rmapshaper::ms_simplify(counties, keep = .005)
mapview::npts(counties)
## [1] 51976
mapview::npts(counties_simp)
## [1] 21023
Roughly 50% of the points were removed. This makes the edge of the border less accurate, but makes the computations faster.
plot_tess = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, fill = "white", col = "navy", size = .2)+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", nrow(sf_obj), "features." ))
}
plot_tess(vor_tess, "Voronoi Tessellation")
plot_tess(tri_tess, "Triangulated Tessellation")
plot_tess(gridded_counties, "Gridded Coverage")
plot_tess(hex_counties, "Hexagonal Coverage")
plot_tess(counties_simp, "Original")
tess_sum = function(sf_obj, text){
area = st_area(sf_obj) %>%
units::set_units("km^2") %>%
units::drop_units()
data.frame(num_features = nrow(sf_obj),
mean_area = mean(area),
sd_area = sd(area),
total_area = sum(area),
text = text
)
}
vor_sum_tess = tess_sum(vor_tess, "Voronoi")
tri_sum_tess = tess_sum(tri_tess, "Triangulation")
hex_sum = tess_sum(hex_counties, "Hexagon")
grid_sum = tess_sum(gridded_counties, "Grid")
summary_tess = bind_rows(
tess_sum(counties_simp, "Original Counties"),
tess_sum(tri_tess, "Triangulation"),
tess_sum(vor_tess, "Voronoi"),
tess_sum(hex_counties, "Hexagon"),
tess_sum(gridded_counties, "Grid")
)
knitr::kable(summary_tess,
caption = "Five Tessellation summaries",
col.names = c("Number of Features", "Mean Area", "Standard Deviation of Features", "Total Area", "Text"),
format.args = list(big.marks = ","))
| Number of Features | Mean Area | Standard Deviation of Features | Total Area | Text |
|---|---|---|---|---|
| 3069 | 2539.396 | 3455.0175 | 7793407 | Original Counties |
| 6196 | 1251.808 | 1575.7996 | 7756200 | Triangulation |
| 3108 | 2521.745 | 2887.1397 | 7837583 | Voronoi |
| 2381 | 3290.644 | 835.6690 | 7835024 | Hexagon |
| 3292 | 2374.481 | 559.3614 | 7816790 | Grid |
For each of the 5 tessellations, the area and number of polygons changed. This will cause the MAUP to change. The total area was largest with the grid coverage tessellation.The triangulation tessellation had the most number of polygons, and the hexagonal coverage had the about half the number of polygons.
dams <- read_excel("~/github/geog-13-labs/data/NID2019_U.xlsx")
dams_sf = dams %>%
filter(!is.na(LONGITUDE), !is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE","LATITUDE"), crs = 4326) %>%
st_transform(5070)
pip_func = function(points, polygon, id){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get('id')) %>%
setNames(c(id, "n")) %>%
left_join(polygon, by = id) %>%
st_as_sf()
}
pip_func_county_simp = function(points, polygon, id){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get('geoid')) %>%
setNames(c(id, "n")) %>%
left_join(polygon, by = id) %>%
st_as_sf()
}
vor_pip = pip_func(dams_sf, vor_tess, "id")
tri_pip = pip_func(dams_sf, tri_tess, "id")
gridded_pip = pip_func(dams_sf, gridded_counties, "id")
hex_pip = pip_func(dams_sf, hex_counties, "id")
counties_simp_pip = pip_func_county_simp(dams_sf, counties_simp, "geoid")
plot_pip = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, aes(fill = log(n)))+
scale_fill_viridis_c()+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))
}
plot_pip(vor_pip, "Voronoi")
plot_pip(tri_pip, "triangulation")
plot_pip(gridded_pip, "gridded")
plot_pip(hex_pip, "hexagonal")
plot_pip(counties_simp_pip, "original")
The more polygons each tessellation had, the greater number of dams there were in each section. Moving forward, I will be using the hexagonal tessellation.
dam_freq = strsplit(dams_sf$PURPOSES, split = "") %>%
unlist() %>%
table() %>%
as.data.frame() %>%
setNames(c("abbr", "count"))
r_grep = grepl("R", dams_sf$PURPOSES[1:20])
s_grep = grepl("S", dams_sf$PURPOSES[1:20])
h_grep = grepl("H", dams_sf$PURPOSES[1:20])
d_grep = grepl("D", dams_sf$PURPOSES[1:20])
plot_pip = function(sf_obj, title){
ggplot()+
geom_sf(data = sf_obj, aes(fill = log(n)))+
scale_fill_viridis_c()+
theme_void() +
labs(title = title, caption = paste("This tessellation has:", sum(sf_obj$n), "dams." ))+
gghighlight::gghighlight(n>=(mean(n)+sd(n)))
}
rec = dams_sf %>%
filter(grepl("R", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
s = dams_sf %>%
filter(grepl("S", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
h = dams_sf %>%
filter(grepl("H", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
d = dams_sf %>%
filter(grepl("D", PURPOSES)) %>%
pip_func(vor_tess, "id") %>%
plot_pip(vor_tess)+
gghighlight::gghighlight(n >= (mean(n)+sd(n)))
rec
s
h
d
The dams found align with major lakes and rivers, such as the Mississippi. Therefore, the geographic distribution does “make sense”. I feel that the original tessellation would provide better results, but the hexagonal provides results closest to the original.